## this code chunk fits the dimension reduction methods
# select dimension reduction hyperparameter grids
TSNE_PERPLEXITIES <- c(10, 30, 60, 100, 300)
UMAP_N_NEIGHBORS <- c(10, 30, 60, 100, 300)
# select dimension reduction methods
dr_fun_ls <- c(
list("PCA" = fit_pca),
purrr::map(
TSNE_PERPLEXITIES,
~ purrr::partial(fit_tsne, dims = 2, perplexity = .x)
) |>
setNames(sprintf("tSNE (perplexity = %d)", TSNE_PERPLEXITIES)),
purrr::map(
UMAP_N_NEIGHBORS,
~ purrr::partial(fit_umap, dims = 2, n_neighbors = .x
)
) |>
setNames(sprintf("UMAP (n_neighbors = %d)", UMAP_N_NEIGHBORS))
)
fit_results_fname <- file.path(RESULTS_PATH, "dimension_reduction_fits.rds")
if (!file.exists(fit_results_fname)) {
# fit dimension reduction methods (if not already cached)
dr_fit_ls <- purrr::map(
train_data_ls,
function(train_data) {
purrr::map(dr_fun_ls, function(dr_fun) dr_fun(train_data))
}
)
# save dimension reduction fits
saveRDS(dr_fit_ls, file = fit_results_fname)
} else {
# read in dimension reduction fits (if already cached)
dr_fit_ls <- readRDS(fit_results_fname)
}
# aggregate all dimension reduction results into one df
plt_df <- purrr::list_flatten(dr_fit_ls, name_spec = "{inner} [{outer}]") |>
purrr::map(
~ .x$scores[, 1:2] |>
setNames(sprintf("Component %d", 1:2)) |>
dplyr::bind_cols(
metadata$train |> dplyr::select(GC_NAME, GLAT, GLON)
) |>
dplyr::mutate(
id = 1:dplyr::n()
)
) |>
dr_results_to_df()